# -*- coding: utf-8 -*-
# time: 2025/4/17 11:04
# file: BLIP-2_fh_inference.py
# author: hanson
"""
案例2：多模态模型推理（BLIP-2）

"""

# 安装依赖: pip install accelerate bitsandbytes
from PIL import Image
from transformers import Blip2Processor, Blip2ForConditionalGeneration
import torch

processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
model = Blip2ForConditionalGeneration.from_pretrained(
    "Salesforce/blip2-opt-2.7b",
    torch_dtype=torch.float16,
    device_map="auto"
)

image = Image.open("./image/33.jpg")
question = "这张图片里有什么？"
inputs = processor(image, question, return_tensors="pt")
out = model.generate(**inputs)
print(processor.decode(out[0], skip_special_tokens=True))